#!/bin/sh
#############################################################
# Configuration file for clone detection.
#

#############################################################################
# Often, need to change these common parameters:
# - FILE_PATTERN : what are the input file name patterns for Deckard
# - SRC_DIR : the root directory containing the source files
# - DECKARD_DIR : Where is the home directory of DECKARD
# - clone detection parameters: c.f. DECKARD's paper
#   -- MIN_TOKENS
#   -- STRIDE
#   -- SIMILARITY
# For Deckard2, also need to set
# - PDG_DIR : the subdir name under $SRC_DIR that contains PDG dot files
# - AST_DIR : the subdir name under $SRC_DIR that contains AST dot files
# - TYPE_FILE : the file that defines AST node type names and IDs
# - RELEVANT_NODEFILE : the file that defines relevant AST node type names
# - LEAF_NODEFILE : the file that defines leaf AST node type names
# - PARENT_NODEFILE : the file that defines parent AST node type names
# The above 4 parameters are hard-coded in Deckard1 for different langauages,
# while Deckard2 extracts those out to make it configurable without the
# need to recompile.

# Since Deckard 1&2 use different parameters, please make sure 
# the parameters are set corrected for either Deckard1 or Deckard2.
# TODO: make the check automatic.
#
# Deckard2 supports only dot; Deckard1 supports only java, c, php.
FILE_PATTERN='*.java' # used in the 'find' command below
# where are the source files?
SRC_DIR="src"
PDG_DIR="ddgs" # used by Deckard2 for 'find $SRC_DIR -ipath "*/$PDG_DIR/$FILE_PATTERN"'
AST_DIR="asts" # each pdg should have an ast with the same name in a different folder
# where are node definition files? used by Deckard2
TYPE_FILE='/home/lingxiao/projects/Deckard/testdata/deckard3/AstNodeTypeNamesIDs.txt'
RELEVANT_NODEFILE='/home/lingxiao/projects/Deckard/testdata/deckard3/AstRelevantNodes.txt'
LEAF_NODEFILE='/home/lingxiao/projects/Deckard/testdata/deckard3/AstLeafNodes.txt'
PARENT_NODEFILE='/home/lingxiao/projects/Deckard/testdata/deckard3/AstParentNodes.txt'

# where is Deckard?
DECKARD_DIR="/home/lingxiao/projects/Deckard/DeckardDebugging"
# clone parameters; refer to paper.
MIN_TOKENS='30 50'  # can be a sequence of integers
STRIDE='2 0'  # can be a sequence of integers
SIMILARITY='1.0 0.95'  # can be a sequence of values <= 1
#DISTANCE='0 0.70711 1.58114 2.236'

###########################################################
# Where to store result files?
#
# where to output generated vectors?
VECTOR_DIR="vectors"
# where to output detected clone clusters?
CLUSTER_DIR="clusters"
# where to output timing/debugging info?
TIME_DIR="times"

##########################################################
# where are several programs we need?
#
# where is the vector generator?
VGEN_EXEC="$DECKARD_DIR/src"
case $FILE_PATTERN in 
  *.dot )
    VGEN_EXEC="$VGEN_EXEC/dot2d/dotvgen" ;;
  *.java )
    VGEN_EXEC="$VGEN_EXEC/main/jvecgen" ;;
  *.php )
    VGEN_EXEC="$VGEN_EXEC/main/phpvecgen" ;;
  *.c | *.h )
    VGEN_EXEC="$VGEN_EXEC/main/cvecgen" ;;
  * )
    echo "Error: invalid FILE_PATTERN: $FILE_PATTERN"
    VGEN_EXEC="$VGEN_EXEC/invalidvecgen" ;;
esac
# how to divide the vectors into groups?
GROUPING_EXEC="$DECKARD_DIR/src/vgen/vgrouping/runvectorsort"
# where is the lsh for vector clustering?
CLUSTER_EXEC="$DECKARD_DIR/src/lsh/bin/enumBuckets"
# how to post process clone groups?
POSTPRO_EXEC="$DECKARD_DIR/scripts/clonedetect/post_process_groupfile"
# how to transform source code html? Used by Deckard1 only
SRC2HTM_EXEC=source-highlight 
SRC2HTM_OPTS=--line-number-ref

############################################################
# For parallel processing
#
# the maximal number of processes to be used (by xargs)
# - 0 means as many as possible (upto xargs)
MAX_PROCS=8

##################################################################
# Some additional, internal parameters; can be ignored
#
# the maximal vector size for the first group; not really useful
GROUPING_S='30'  # should be a single value
#GROUPING_D
#GROUPING_C

export DECKARD_DIR
export FILE_PATTERN 
export SRC_DIR
export PDG_DIR
export AST_DIR

export TYPE_FILE
export RELEVANT_NODEFILE
export LEAF_NODEFILE
export PARENT_NODEFILE

export VECTOR_DIR
export TIME_DIR
export CLUSTER_DIR

export VGEN_EXEC
export GROUPING_EXEC
export CLUSTER_EXEC
export POSTPRO_EXEC
export SRC2HTM_EXEC
export SRC2HTM_OPTS

export MIN_TOKENS
export STRIDE
#export DISTANCE
export SIMILARITY
export GROUPING_S
export GROUPING_D
export GROUPING_C

